/***********************************************************************

	This file is part of KEEL-software, the Data Mining tool for regression, 
	classification, clustering, pattern mining and so on.

	Copyright (C) 2004-2010
	
	F. Herrera (herrera@decsai.ugr.es)
    L. Sánchez (luciano@uniovi.es)
    J. Alcalá-Fdez (jalcala@decsai.ugr.es)
    S. García (sglopez@ujaen.es)
    A. Fernández (alberto.fernandez@ujaen.es)
    J. Luengo (julianlm@decsai.ugr.es)

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see http://www.gnu.org/licenses/
  
**********************************************************************/

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Instances.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package keel.Algorithms.SVM.SMO.core;

import keel.Dataset.*;

import org.core.Randomize;

import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 * Class for handling an ordered set of weighted instances. <p>
 *
 * Typical usage: <p>
 * <pre>
 * import keel.Algorithms.SVM.SMO.core.converters.ConverterUtils.DataSource;
 * ...
 * 
 * // Read all the instances in the file (ARFF, CSV, XRFF, ...)
 * DataSource source = new DataSource(filename);
 * Instances instances = source.getDataSet();
 *
 * // Make the last attribute be the class
 * instances.setClassIndex(instances.numAttributes() - 1);
 * 
 * // Print header and instances.
 * System.out.println("\nDataset:\n");
 * System.out.println(instances);
 * 
 * ...
 * </pre><p>
 *
 * All methods that change a set of instances are safe, ie. a change
 * of a set of instances does not affect any other sets of
 * instances. All methods that change a datasets's attribute
 * information clone the dataset before it is changed.
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 1.1 $ 
 */
//Modified to work with KEEL attributes, instead of Weka ones (Julian)
public class Instances 
implements Serializable {

	/** for serialization */
	static final long serialVersionUID = -19412345060742748L;

	/** The filename extension that should be used for arff files */
	public final static String FILE_EXTENSION = ".arff";

	/** The filename extension that should be used for bin. serialized instances files */
	public final static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";

	/** The keyword used to denote the start of an arff header */
	public final static String ARFF_RELATION = "@relation";

	/** The keyword used to denote the start of the arff data section */
	public final static String ARFF_DATA = "@data";

	/** The dataset's name. */
	protected /*@spec_public non_null@*/ String m_RelationName;         

	/** The attribute information. */
	protected /*@spec_public non_null@*/ FastVector m_Attributes;
	/*  public invariant (\forall int i; 0 <= i && i < m_Attributes.size(); 
                    m_Attributes.elementAt(i) != null);
	 */

	/** The instances. */
	protected /*@spec_public non_null@*/ FastVector m_Instances;

	/** The class attribute's index */
	protected int m_ClassIndex;
	//@ protected invariant classIndex() == m_ClassIndex;

	/** The lines read so far in case of incremental loading. Since the 
	 * StreamTokenizer will be re-initialized with every instance that is read,
	 * we have to keep track of the number of lines read so far. 
	 * @see #readInstance(Reader) */
	protected int m_Lines = 0;

	/**
	 * Reads an ARFF file from a reader, and assigns a weight of
	 * one to each instance. Lets the index of the class 
	 * attribute be undefined (negative).
	 *
	 * @param reader the reader
	 * @throws IOException if the ARFF file is not read 
	 * successfully
	 */
//	public Instances(/*@non_null@*/Reader reader) throws IOException {
//	ArffReader arff = new ArffReader(reader);
//	Instances dataset = arff.getData();
//	initialize(dataset, dataset.numInstances());
//	dataset.copyInstances(0, this, dataset.numInstances());
//	compactify();
//	}

	/**
	 * Reads the header of an ARFF file from a reader and 
	 * reserves space for the given number of instances. Lets
	 * the class index be undefined (negative).
	 *
	 * @param reader the reader
	 * @param capacity the capacity
	 * @throws IllegalArgumentException if the header is not read successfully
	 * or the capacity is negative.
	 * @throws IOException if there is a problem with the reader.
	 * @deprecated instead of using this method in conjunction with the
	 * <code>readInstance(Reader)</code> method, one should use the 
	 * <code>ArffLoader</code> or <code>DataSource</code> class instead.
	 * @see weka.core.converters.ArffLoader
	 * @see weka.core.converters.ConverterUtils.DataSource
	 */
	//@ requires capacity >= 0;
	//@ ensures classIndex() == -1;
//	@Deprecated public Instances(/*@non_null@*/Reader reader, int capacity)
//	throws IOException {

//	ArffReader arff = new ArffReader(reader, 0);
//	Instances header = arff.getStructure();
//	initialize(header, capacity);
//	m_Lines = arff.getLineNo();
//	}

	/**
	 * Constructor copying all instances and references to
	 * the header information from the given set of instances.
	 *
	 * @param dataset the set to be copied
	 */
	public Instances(/*@non_null@*/Instances dataset) {

		this(dataset, dataset.numInstances());

		dataset.copyInstances(0, this, dataset.numInstances());
	}

	/**
	 * Constructor creating an empty set of instances. Copies references
	 * to the header information from the given set of instances. Sets
	 * the capacity of the set of instances to 0 if its negative.
	 *
	 * @param dataset the instances from which the header 
	 * information is to be taken
	 * @param capacity the capacity of the new dataset 
	 */
	public Instances(/*@non_null@*/Instances dataset, int capacity) {
		initialize(dataset, capacity);
	}

	/**
	 * initializes with the header information of the given dataset and sets
	 * the capacity of the set of instances.
	 * 
	 * @param dataset the dataset to use as template
	 * @param capacity the number of rows to reserve
	 */
	protected void initialize(Instances dataset, int capacity) {
		if (capacity < 0)
			capacity = 0;

		// Strings only have to be "shallow" copied because
		// they can't be modified.
		m_ClassIndex   = dataset.m_ClassIndex;
		m_RelationName = dataset.m_RelationName;
		m_Attributes   = dataset.m_Attributes;
		m_Instances    = new FastVector(capacity);
	}

	/**
	 * Creates a new set of instances by copying a 
	 * subset of another set.
	 *
	 * @param source the set of instances from which a subset 
	 * is to be created
	 * @param first the index of the first instance to be copied
	 * @param toCopy the number of instances to be copied
	 * @throws IllegalArgumentException if first and toCopy are out of range
	 */
	//@ requires 0 <= first;
	//@ requires 0 <= toCopy;
	//@ requires first + toCopy <= source.numInstances();
	public Instances(/*@non_null@*/Instances source, int first, int toCopy) {

		this(source, toCopy);

		if ((first < 0) || ((first + toCopy) > source.numInstances())) {
			throw new IllegalArgumentException("Parameters first and/or toCopy out "+
			"of range");
		}
		source.copyInstances(first, this, toCopy);
	}

	/**
	 * Creates an empty set of instances. Uses the given
	 * attribute information. Sets the capacity of the set of 
	 * instances to 0 if its negative. Given attribute information
	 * must not be changed after this constructor has been used.
	 *
	 * @param name the name of the relation
	 * @param attInfo the attribute information
	 * @param capacity the capacity of the set
	 */
	public Instances(/*@non_null@*/String name, 
			/*@non_null@*/FastVector attInfo, int capacity) {

		m_RelationName = name;
		m_ClassIndex = -1;
		m_Attributes = attInfo;
//		for (int i = 0; i < numAttributes(); i++) {
//		attribute(i).setIndex(i);
//		}
		m_Instances = new FastVector(capacity);
	}

	/**
	 * Create a copy of the structure, but "cleanse" string types (i.e.
	 * doesn't contain references to the strings seen in the past).
	 * Also cleanses all relational attributes.
	 *
	 * @return a copy of the instance structure.
	 */
//	public Instances stringFreeStructure() {

//	FastVector atts = (FastVector)m_Attributes.copy();
//	for (int i = 0 ; i < atts.size(); i++) {
//	Attribute att = (Attribute)atts.elementAt(i);
//	if (att.type() == Attribute.STRING) {
//	atts.setElementAt(new Attribute(att.name(), (FastVector)null), i);
//	} else if (att.type() == Attribute.RELATIONAL) {
//	atts.setElementAt(new Attribute(att.name(), new Instances(att.relation(), 0)), i);
//	}
//	}
//	Instances result = new Instances(relationName(), atts, 0);
//	result.m_ClassIndex = m_ClassIndex;
//	return result;
//	}

	/**
	 * Adds one instance to the end of the set. 
	 * Shallow copies instance before it is added. Increases the
	 * size of the dataset if it is not large enough. Does not
	 * check if the instance is compatible with the dataset.
	 * Note: String or relational values are not transferred.
	 *
	 * @param instance the instance to be added
	 */
	public void add(/*@non_null@*/ Instance instance) {

		Instance newInstance = (Instance)instance.copy();

		newInstance.setDataset(this);
		m_Instances.addElement(newInstance);
	}

	/**
	 * Returns an attribute.
	 *
	 * @param index the attribute's index (index starts with 0)
	 * @return the attribute at the given position
	 */
	//@ requires 0 <= index;
	//@ requires index < m_Attributes.size();
	//@ ensures \result != null;
	public /*@pure@*/ Attribute attribute(int index) {

		return (Attribute) m_Attributes.elementAt(index);
	}

	/**
	 * Returns an attribute given its name. If there is more than
	 * one attribute with the same name, it returns the first one.
	 * Returns null if the attribute can't be found.
	 *
	 * @param name the attribute's name
	 * @return the attribute with the given name, null if the
	 * attribute can't be found
	 */ 
	public /*@pure@*/ Attribute attribute(String name) {

		for (int i = 0; i < numAttributes(); i++) {
			if (attribute(i).getName().equals(name)) {
				return attribute(i);
			}
		}
		return null;
	}

	/**
	 * Checks for attributes of the given type in the dataset
	 *
	 * @param attType  the attribute type to look for
	 * @return         true if attributes of the given type are present
	 */
	public boolean checkForAttributeType(int attType) {

		int i = 0;

		while (i < m_Attributes.size()) {
			if (attribute(i++).getType() == attType) {
				return true;
			}
		}
		return false;
	}

	/**
	 * Checks for string attributes in the dataset
	 *
	 * @return true if string attributes are present, false otherwise
	 */
	public /*@pure@*/ boolean checkForStringAttributes() {
		return checkForAttributeType(Attribute.NOMINAL);
	}

	/**
	 * Checks if the given instance is compatible
	 * with this dataset. Only looks at the size of
	 * the instance and the ranges of the values for 
	 * nominal and string attributes.
	 *
	 * @param instance the instance to check
	 * @return true if the instance is compatible with the dataset 
	 */
	public /*@pure@*/ boolean checkInstance(Instance instance) {

		if (instance.numAttributes() != numAttributes()) {
			return false;
		}
		for (int i = 0; i < numAttributes(); i++) {
			if (instance.isMissing(i)) {
				continue;
			} else if (attribute(i).getType()==Attribute.NOMINAL) {
				if (!(Utils.eq(instance.value(i),
						(double)(int)instance.value(i)))) {
					return false;
				} else if (Utils.sm(instance.value(i), 0) ||
						Utils.gr(instance.value(i),
								attribute(i).getNumNominalValues())) {
					return false;
				}
			}
		}
		return true;
	}

	/**
	 * Returns the class attribute.
	 *
	 * @return the class attribute
	 * @throws UnassignedClassException if the class is not set
	 */
	//@ requires classIndex() >= 0;
	public /*@pure@*/ Attribute classAttribute() {

		if (m_ClassIndex < 0) {
			throw new UnassignedClassException("Class index is negative (not set)!");
		}
		return attribute(m_ClassIndex);
	}

	/**
	 * Returns the class attribute's index. Returns negative number
	 * if it's undefined.
	 *
	 * @return the class index as an integer
	 */
	// ensures \result == m_ClassIndex;
	public /*@pure@*/ int classIndex() {

		return m_ClassIndex;
	}

	/**
	 * Compactifies the set of instances. Decreases the capacity of
	 * the set so that it matches the number of instances in the set.
	 */
	public void compactify() {

		m_Instances.trimToSize();
	}

	/**
	 * Removes all instances from the set.
	 */
	public void delete() {

		m_Instances = new FastVector();
	}

	/**
	 * Removes an instance at the given position from the set.
	 *
	 * @param index the instance's position (index starts with 0)
	 */
	//@ requires 0 <= index && index < numInstances();
	public void delete(int index) {

		m_Instances.removeElementAt(index);
	}

	/**
	 * Deletes an attribute at the given position 
	 * (0 to numAttributes() - 1). A deep copy of the attribute
	 * information is performed before the attribute is deleted.
	 *
	 * @param position the attribute's position (position starts with 0)
	 * @throws IllegalArgumentException if the given index is out of range 
	 *            or the class attribute is being deleted
	 */
	//@ requires 0 <= position && position < numAttributes();
	//@ requires position != classIndex();
	public void deleteAttributeAt(int position) {

		if ((position < 0) || (position >= m_Attributes.size())) {
			throw new IllegalArgumentException("Index out of range");
		}
		if (position == m_ClassIndex) {
			throw new IllegalArgumentException("Can't delete class attribute");
		}
		freshAttributeInfo();
		if (m_ClassIndex > position) {
			m_ClassIndex--;
		}
		m_Attributes.removeElementAt(position);
		for (int i = position; i < m_Attributes.size(); i++) {
			Attribute current = (Attribute)m_Attributes.elementAt(i);
//			current.setIndex(current.index() - 1);
		}
		for (int i = 0; i < numInstances(); i++) {
			instance(i).forceDeleteAttributeAt(position); 
		}
	}

	/**
	 * Deletes all attributes of the given type in the dataset. A deep copy of 
	 * the attribute information is performed before an attribute is deleted.
	 *
	 * @param attType the attribute type to delete
	 * @throws IllegalArgumentException if attribute couldn't be 
	 * successfully deleted (probably because it is the class attribute).
	 */
	public void deleteAttributeType(int attType) {
		int i = 0;
		while (i < m_Attributes.size()) {
			if (attribute(i).getType() == attType) {
				deleteAttributeAt(i);
			} else {
				i++;
			}
		}
	}

	/**
	 * Deletes all string attributes in the dataset. A deep copy of the attribute
	 * information is performed before an attribute is deleted.
	 *
	 * @throws IllegalArgumentException if string attribute couldn't be 
	 * successfully deleted (probably because it is the class attribute).
	 * @see #deleteAttributeType(int)
	 */
	public void deleteStringAttributes() {
		deleteAttributeType(Attribute.NOMINAL);
	}

	/**
	 * Removes all instances with missing values for a particular
	 * attribute from the dataset.
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 */
	//@ requires 0 <= attIndex && attIndex < numAttributes();
	public void deleteWithMissing(int attIndex) {

		FastVector newInstances = new FastVector(numInstances());

		for (int i = 0; i < numInstances(); i++) {
			if (!instance(i).isMissing(attIndex)) {
				newInstances.addElement(instance(i));
			}
		}
		m_Instances = newInstances;
	}

	/**
	 * Removes all instances with missing values for a particular
	 * attribute from the dataset.
	 *
	 * @param att the attribute
	 */
	public void deleteWithMissing(/*@non_null@*/ Attribute att) {
		
		deleteWithMissing(indexOf(att));
	}

	/**
	 * Removes all instances with a missing class value
	 * from the dataset.
	 *
	 * @throws UnassignedClassException if class is not set
	 */
	public void deleteWithMissingClass() {

		if (m_ClassIndex < 0) {
			throw new UnassignedClassException("Class index is negative (not set)!");
		}
		deleteWithMissing(m_ClassIndex);
	}

	/**
	 * Returns an enumeration of all the attributes.
	 *
	 * @return enumeration of all the attributes.
	 */
	public /*@non_null pure@*/ Enumeration enumerateAttributes() {

		return m_Attributes.elements(m_ClassIndex);
	}

	/**
	 * Returns an enumeration of all instances in the dataset.
	 *
	 * @return enumeration of all instances in the dataset
	 */
	public /*@non_null pure@*/ Enumeration enumerateInstances() {

		return m_Instances.elements();
	}

	/**
	 * Checks if two headers are equivalent.
	 *
	 * @param dataset another dataset
	 * @return true if the header of the given dataset is equivalent 
	 * to this header
	 */
	public /*@pure@*/ boolean equalHeaders(Instances dataset){

		// Check class and all attributes
		if (m_ClassIndex != dataset.m_ClassIndex) {
			return false;
		}
		if (m_Attributes.size() != dataset.m_Attributes.size()) {
			return false;
		}
		for (int i = 0; i < m_Attributes.size(); i++) {
			if (!(attribute(i).equals(dataset.attribute(i)))) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Returns the first instance in the set.
	 *
	 * @return the first instance in the set
	 */
	//@ requires numInstances() > 0;
	public /*@non_null pure@*/ Instance firstInstance() {

		return (Instance)m_Instances.firstElement();
	}

	/**
	 * Returns a random number generator. The initial seed of the random
	 * number generator depends on the given seed and the hash code of
	 * a string representation of a instances chosen based on the given
	 * seed. 
	 *
	 * @param seed the given seed
	 * @return the random number generator
	 */
	public Random getRandomNumberGenerator(long seed) {

		Random r = new Random(seed);
		r.setSeed(instance(r.nextInt(numInstances())).toString().hashCode() + seed);
		return r;
	}

	/**
	 * Inserts an attribute at the given position (0 to 
	 * numAttributes()) and sets all values to be missing.
	 * Shallow copies the attribute before it is inserted, and performs
	 * a deep copy of the existing attribute information.
	 *
	 * @param att the attribute to be inserted
	 * @param position the attribute's position (position starts with 0)
	 * @throws IllegalArgumentException if the given index is out of range
	 */
	//@ requires 0 <= position;
	//@ requires position <= numAttributes();
	public void insertAttributeAt(/*@non_null@*/ Attribute att, int position) {

		if ((position < 0) ||
				(position > m_Attributes.size())) {
			throw new IllegalArgumentException("Index out of range");
		}
		freshAttributeInfo();
//		att.setIndex(position);
		m_Attributes.insertElementAt(att, position);
		for (int i = position + 1; i < m_Attributes.size(); i++) {
			Attribute current = (Attribute)m_Attributes.elementAt(i);
//			current.setIndex(indexOf(current) + 1);
		}
		for (int i = 0; i < numInstances(); i++) {
			instance(i).forceInsertAttributeAt(position);
		}
		if (m_ClassIndex >= position) {
			m_ClassIndex++;
		}
	}

	/**
	 * Returns the instance at the given position.
	 *
	 * @param index the instance's index (index starts with 0)
	 * @return the instance at the given position
	 */
	//@ requires 0 <= index;
	//@ requires index < numInstances();
	public /*@non_null pure@*/ Instance instance(int index) {

		return (Instance)m_Instances.elementAt(index);
	}

	/**
	 * Returns the kth-smallest attribute value of a numeric attribute.
	 * Note that calling this method will change the order of the data!
	 *
	 * @param att the Attribute object
	 * @param k the value of k
	 * @return the kth-smallest value
	 */
	public double kthSmallestValue(Attribute att, int k) {

		return kthSmallestValue(indexOf(att), k);
	}

	/**
	 * Returns the kth-smallest attribute value of a numeric attribute.
	 * Note that calling this method will change the order of the data!
	 * The number of non-missing values in the data must be as least
	 * as last as k for this to work.
	 *
	 * @param attIndex the attribute's index
	 * @param k the value of k
	 * @return the kth-smallest value
	 */
	public double kthSmallestValue(int attIndex, int k) {

		if (attribute(attIndex).getType()==Attribute.NOMINAL) {
			throw new IllegalArgumentException("Instances: attribute must be numeric to compute kth-smallest value.");
		}

		int i,j;

		// move all instances with missing values to end
		j = numInstances() - 1;
		i = 0;
		while (i <= j) {
			if (instance(j).isMissing(attIndex)) {
				j--;
			} else {
				if (instance(i).isMissing(attIndex)) {
					swap(i,j);
					j--;
				}
				i++;
			}
		}

		if ((k < 0) || (k > j)) {
			throw new IllegalArgumentException("Instances: value for k for computing kth-smallest value too large.");
		}

		return instance(select(attIndex, 0, j, k)).value(attIndex);
	}

	/**
	 * Returns the last instance in the set.
	 *
	 * @return the last instance in the set
	 */
	//@ requires numInstances() > 0;
	public /*@non_null pure@*/ Instance lastInstance() {

		return (Instance)m_Instances.lastElement();
	}

	/**
	 * Returns the mean (mode) for a numeric (nominal) attribute as
	 * a floating-point value. Returns 0 if the attribute is neither nominal nor 
	 * numeric. If all values are missing it returns zero.
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 * @return the mean or the mode
	 */
	public /*@pure@*/ double meanOrMode(int attIndex) {

		double result, found;
		int [] counts;

		if (!(attribute(attIndex).getType()==Attribute.NOMINAL)) {
			result = found = 0;
			for (int j = 0; j < numInstances(); j++) {
				if (!instance(j).isMissing(attIndex)) {
					found += instance(j).weight();
					result += instance(j).weight()*instance(j).value(attIndex);
				}
			}
			if (found <= 0) {
				return 0;
			} else {
				return result / found;
			}
		} else if (attribute(attIndex).getType()==Attribute.NOMINAL) {
			counts = new int[attribute(attIndex).getNumNominalValues()];
			for (int j = 0; j < numInstances(); j++) {
				if (!instance(j).isMissing(attIndex)) {
					counts[(int) instance(j).value(attIndex)] += instance(j).weight();
				}
			}
			return (double)Utils.maxIndex(counts);
		} else {
			return 0;
		}
	}

	/**
	 * Returns the mean (mode) for a numeric (nominal) attribute as a
	 * floating-point value.  Returns 0 if the attribute is neither
	 * nominal nor numeric.  If all values are missing it returns zero.
	 *
	 * @param att the attribute
	 * @return the mean or the mode 
	 */
	public /*@pure@*/ double meanOrMode(Attribute att) {

		return meanOrMode(indexOf(att));
	}

	/**
	 * Returns the number of attributes.
	 *
	 * @return the number of attributes as an integer
	 */
	//@ ensures \result == m_Attributes.size();
	public /*@pure@*/ int numAttributes() {

		return m_Attributes.size();
	}

	/**
	 * Returns the number of class labels.
	 *
	 * @return the number of class labels as an integer if the class 
	 * attribute is nominal, 1 otherwise.
	 * @throws UnassignedClassException if the class is not set
	 */
	//@ requires classIndex() >= 0;
	public /*@pure@*/ int numClasses() {

		if (m_ClassIndex < 0) {
			throw new UnassignedClassException("Class index is negative (not set)!");
		}
		if (!(classAttribute().getType()==Attribute.NOMINAL)) {
			return 1;
		} else {
			return classAttribute().getNumNominalValues();
		}
	}

	/**
	 * Returns the number of distinct values of a given attribute.
	 * Returns the number of instances if the attribute is a
	 * string attribute. The value 'missing' is not counted.
	 *
	 * @param attIndex the attribute (index starts with 0)
	 * @return the number of distinct values of a given attribute
	 */
	//@ requires 0 <= attIndex;
	//@ requires attIndex < numAttributes();
	public /*@pure@*/ int numDistinctValues(int attIndex) {

		if (!(attribute(attIndex).getType()==Attribute.NOMINAL)) {
			double [] attVals = attributeToDoubleArray(attIndex);
			int [] sorted = Utils.sort(attVals);
			double prev = 0;
			int counter = 0;
			for (int i = 0; i < sorted.length; i++) {
				Instance current = instance(sorted[i]);
				if (current.isMissing(attIndex)) {
					break;
				}
				if ((i == 0) || 
						(current.value(attIndex) > prev)) {
					prev = current.value(attIndex);
					counter++;
				}
			}
			return counter;
		} else {
			return attribute(attIndex).getNumNominalValues();
		}
	}

	/**
	 * Returns the number of distinct values of a given attribute.
	 * Returns the number of instances if the attribute is a
	 * string attribute. The value 'missing' is not counted.
	 *
	 * @param att the attribute
	 * @return the number of distinct values of a given attribute
	 */
	public /*@pure@*/ int numDistinctValues(/*@non_null@*/Attribute att) {

		return numDistinctValues(indexOf(att));
	}

	/**
	 * Returns the number of instances in the dataset.
	 *
	 * @return the number of instances in the dataset as an integer
	 */
	//@ ensures \result == m_Instances.size();
	public /*@pure@*/ int numInstances() {

		return m_Instances.size();
	}

	/**
	 * Shuffles the instances in the set so that they are ordered 
	 * randomly.
	 *
	 * @param random a random number generator
	 */
	public void randomize(Random random) {

		for (int j = numInstances() - 1; j > 0; j--)
			swap(j, random.nextInt(j+1));
	}
	
	/**
	 * Shuffles the instances in the set so that they are ordered 
	 * randomly.
	 * It uses the Randomize method of KEEL
	 */
	public void randomize() {

		for (int j = numInstances() - 1; j > 0; j--)
			swap(j, Randomize.Randint(0, j+1));
	}

	/**
	 * Reads a single instance from the reader and appends it
	 * to the dataset.  Automatically expands the dataset if it
	 * is not large enough to hold the instance. This method does
	 * not check for carriage return at the end of the line.
	 *
	 * @param reader the reader 
	 * @return false if end of file has been reached
	 * @throws IOException if the information is not read 
	 * successfully
	 * @deprecated instead of using this method in conjunction with the
	 * <code>readInstance(Reader)</code> method, one should use the 
	 * <code>ArffLoader</code> or <code>DataSource</code> class instead.
	 * @see weka.core.converters.ArffLoader
	 * @see weka.core.converters.ConverterUtils.DataSource
	 */ 
//	@Deprecated public boolean readInstance(Reader reader) throws IOException {
//
//		ArffReader arff = new ArffReader(reader, this, m_Lines, 1);
//		Instance inst = arff.readInstance(arff.getData(), false);
//		m_Lines = arff.getLineNo();
//		if (inst != null) {
//			add(inst);
//			return true;
//		}
//		else {
//			return false;
//		}
//	}    

	/**
	 * Returns the relation's name.
	 *
	 * @return the relation's name as a string
	 */
	//@ ensures \result == m_RelationName;
	public /*@pure@*/ String relationName() {

		return m_RelationName;
	}

	/**
	 * Renames an attribute. This change only affects this
	 * dataset.
	 *
	 * @param att the attribute's index (index starts with 0)
	 * @param name the new name
	 */
//	public void renameAttribute(int att, String name) {
//
//		Attribute newAtt = attribute(att).copy(name);
//		FastVector newVec = new FastVector(numAttributes());
//
//		for (int i = 0; i < numAttributes(); i++) {
//			if (i == att) {
//				newVec.addElement(newAtt);
//			} else {
//				newVec.addElement(attribute(i));
//			}
//		}
//		m_Attributes = newVec;
//	}

	/**
	 * Renames an attribute. This change only affects this
	 * dataset.
	 *
	 * @param att the attribute
	 * @param name the new name
	 */
//	public void renameAttribute(Attribute att, String name) {
//
//		renameAttribute(indexOf(att), name);
//	}

	/**
	 * Renames the value of a nominal (or string) attribute value. This
	 * change only affects this dataset.
	 *
	 * @param att the attribute's index (index starts with 0)
	 * @param val the value's index (index starts with 0)
	 * @param name the new name 
	 */
//	public void renameAttributeValue(int att, int val, String name) {
//
//		Attribute newAtt = (Attribute)attribute(att).copy();
//		FastVector newVec = new FastVector(numAttributes());
//
////		newAtt.setValue(val, name);
//		for (int i = 0; i < numAttributes(); i++) {
//			if (i == att) {
//				newVec.addElement(newAtt);
//			} else {
//				newVec.addElement(attribute(i));
//			}
//		}
//		m_Attributes = newVec;
//	}

	/**
	 * Renames the value of a nominal (or string) attribute value. This
	 * change only affects this dataset.
	 *
	 * @param att the attribute
	 * @param val the value
	 * @param name the new name
	 */
//	public void renameAttributeValue(Attribute att, String val, 
//			String name) {
//
//		int v = indexOfValue(att,val);
////		int v = att.indexOfValue(val);
//		if (v == -1) throw new IllegalArgumentException(val + " not found");
//		renameAttributeValue(indexOf(att), v, name);
//	}

	/**
	 * Creates a new dataset of the same size using random sampling
	 * with replacement.
	 *
	 * @param random a random number generator
	 * @return the new dataset
	 */
	public Instances resample(Random random) {

		Instances newData = new Instances(this, numInstances());
		while (newData.numInstances() < numInstances()) {
			newData.add(instance(random.nextInt(numInstances())));
		}
		return newData;
	}

	/**
	 * Creates a new dataset of the same size using random sampling
	 * with replacement according to the current instance weights. The
	 * weights of the instances in the new dataset are set to one.
	 *
	 * @param random a random number generator
	 * @return the new dataset
	 */
	public Instances resampleWithWeights(Random random) {

		double [] weights = new double[numInstances()];
		for (int i = 0; i < weights.length; i++) {
			weights[i] = instance(i).weight();
		}
		return resampleWithWeights(random, weights);
	}


	/**
	 * Creates a new dataset of the same size using random sampling
	 * with replacement according to the given weight vector. The
	 * weights of the instances in the new dataset are set to one.
	 * The length of the weight vector has to be the same as the
	 * number of instances in the dataset, and all weights have to
	 * be positive.
	 *
	 * @param random a random number generator
	 * @param weights the weight vector
	 * @return the new dataset
	 * @throws IllegalArgumentException if the weights array is of the wrong
	 * length or contains negative weights.
	 */
	public Instances resampleWithWeights(Random random, 
			double[] weights) {

		if (weights.length != numInstances()) {
			throw new IllegalArgumentException("weights.length != numInstances.");
		}
		Instances newData = new Instances(this, numInstances());
		if (numInstances() == 0) {
			return newData;
		}
		double[] probabilities = new double[numInstances()];
		double sumProbs = 0, sumOfWeights = Utils.sum(weights);
		for (int i = 0; i < numInstances(); i++) {
			sumProbs += random.nextDouble();
			probabilities[i] = sumProbs;
		}
		Utils.normalize(probabilities, sumProbs / sumOfWeights);

		// Make sure that rounding errors don't mess things up
		probabilities[numInstances() - 1] = sumOfWeights;
		int k = 0; int l = 0;
		sumProbs = 0;
		while ((k < numInstances() && (l < numInstances()))) {
			if (weights[l] < 0) {
				throw new IllegalArgumentException("Weights have to be positive.");
			}
			sumProbs += weights[l];
			while ((k < numInstances()) &&
					(probabilities[k] <= sumProbs)) { 
				newData.add(instance(l));
				newData.instance(k).setWeight(1);
				k++;
			}
			l++;
		}
		return newData;
	}

	/** 
	 * Sets the class attribute.
	 *
	 * @param att attribute to be the class
	 */
	public void setClass(Attribute att) {

		m_ClassIndex = indexOf(att);
	}

	/** 
	 * Sets the class index of the set.
	 * If the class index is negative there is assumed to be no class.
	 * (ie. it is undefined)
	 *
	 * @param classIndex the new class index (index starts with 0)
	 * @throws IllegalArgumentException if the class index is too big or < 0
	 */
	public void setClassIndex(int classIndex) {

		if (classIndex >= numAttributes()) {
			throw new IllegalArgumentException("Invalid class index: " + classIndex);
		}
		m_ClassIndex = classIndex;
	}

	/**
	 * Sets the relation's name.
	 *
	 * @param newName the new relation name.
	 */
	public void setRelationName(/*@non_null@*/String newName) {

		m_RelationName = newName;
	}

	/**
	 * Sorts the instances based on an attribute. For numeric attributes, 
	 * instances are sorted in ascending order. For nominal attributes, 
	 * instances are sorted based on the attribute label ordering 
	 * specified in the header. Instances with missing values for the 
	 * attribute are placed at the end of the dataset.
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 */
	public void sort(int attIndex) {

		int i,j;

		// move all instances with missing values to end
		j = numInstances() - 1;
		i = 0;
		while (i <= j) {
			if (instance(j).isMissing(attIndex)) {
				j--;
			} else {
				if (instance(i).isMissing(attIndex)) {
					swap(i,j);
					j--;
				}
				i++;
			}
		}
		quickSort(attIndex, 0, j);
	}

	/**
	 * Sorts the instances based on an attribute. For numeric attributes, 
	 * instances are sorted into ascending order. For nominal attributes, 
	 * instances are sorted based on the attribute label ordering 
	 * specified in the header. Instances with missing values for the 
	 * attribute are placed at the end of the dataset.
	 *
	 * @param att the attribute
	 */
	public void sort(Attribute att) {

		sort(indexOf(att));
	}

	/**
	 * Stratifies a set of instances according to its class values 
	 * if the class attribute is nominal (so that afterwards a 
	 * stratified cross-validation can be performed).
	 *
	 * @param numFolds the number of folds in the cross-validation
	 * @throws UnassignedClassException if the class is not set
	 */
	public void stratify(int numFolds) {

		if (numFolds <= 0) {
			throw new IllegalArgumentException("Number of folds must be greater than 1");
		}
		if (m_ClassIndex < 0) {
			throw new UnassignedClassException("Class index is negative (not set)!");
		}
		if (classAttribute().getType()==Attribute.NOMINAL) {

			// sort by class
			int index = 1;
			while (index < numInstances()) {
				Instance instance1 = instance(index - 1);
				for (int j = index; j < numInstances(); j++) {
					Instance instance2 = instance(j);
					if ((instance1.classValue() == instance2.classValue()) ||
							(instance1.classIsMissing() && 
									instance2.classIsMissing())) {
						swap(index,j);
						index++;
					}
				}
				index++;
			}
			stratStep(numFolds);
		}
	}

	/**
	 * Computes the sum of all the instances' weights.
	 *
	 * @return the sum of all the instances' weights as a double
	 */
	public /*@pure@*/ double sumOfWeights() {

		double sum = 0;

		for (int i = 0; i < numInstances(); i++) {
			sum += instance(i).weight();
		}
		return sum;
	}

	/**
	 * Creates the test set for one fold of a cross-validation on 
	 * the dataset.
	 *
	 * @param numFolds the number of folds in the cross-validation. Must
	 * be greater than 1.
	 * @param numFold 0 for the first fold, 1 for the second, ...
	 * @return the test set as a set of weighted instances
	 * @throws IllegalArgumentException if the number of folds is less than 2
	 * or greater than the number of instances.
	 */
	//@ requires 2 <= numFolds && numFolds < numInstances();
	//@ requires 0 <= numFold && numFold < numFolds;
	public Instances testCV(int numFolds, int numFold) {

		int numInstForFold, first, offset;
		Instances test;

		if (numFolds < 2) {
			throw new IllegalArgumentException("Number of folds must be at least 2!");
		}
		if (numFolds > numInstances()) {
			throw new IllegalArgumentException("Can't have more folds than instances!");
		}
		numInstForFold = numInstances() / numFolds;
		if (numFold < numInstances() % numFolds){
			numInstForFold++;
			offset = numFold;
		}else
			offset = numInstances() % numFolds;
		test = new Instances(this, numInstForFold);
		first = numFold * (numInstances() / numFolds) + offset;
		copyInstances(first, test, numInstForFold);
		return test;
	}

	/**
	 * Returns the dataset as a string in ARFF format. Strings
	 * are quoted if they contain whitespace characters, or if they
	 * are a question mark.
	 *
	 * @return the dataset in ARFF format as a string
	 */
	public String toString() {

		StringBuffer text = new StringBuffer();

		text.append(ARFF_RELATION).append(" ").
		append(Utils.quote(m_RelationName)).append("\n\n");
		for (int i = 0; i < numAttributes(); i++) {
			text.append(attribute(i)).append("\n");
		}
		text.append("\n").append(ARFF_DATA).append("\n");

		text.append(stringWithoutHeader());
		return text.toString();
	}

	/**
	 * Returns the instances in the dataset as a string in ARFF format. Strings
	 * are quoted if they contain whitespace characters, or if they
	 * are a question mark.
	 *
	 * @return the dataset in ARFF format as a string
	 */
	protected String stringWithoutHeader() {

		StringBuffer text = new StringBuffer();

		for (int i = 0; i < numInstances(); i++) {
			text.append(instance(i));
			if (i < numInstances() - 1) {
				text.append('\n');
			}
		}
		return text.toString();
	}

	/**
	 * Creates the training set for one fold of a cross-validation 
	 * on the dataset. 
	 *
	 * @param numFolds the number of folds in the cross-validation. Must
	 * be greater than 1.
	 * @param numFold 0 for the first fold, 1 for the second, ...
	 * @return the training set 
	 * @throws IllegalArgumentException if the number of folds is less than 2
	 * or greater than the number of instances.
	 */
	//@ requires 2 <= numFolds && numFolds < numInstances();
	//@ requires 0 <= numFold && numFold < numFolds;
	public Instances trainCV(int numFolds, int numFold) {

		int numInstForFold, first, offset;
		Instances train;

		if (numFolds < 2) {
			throw new IllegalArgumentException("Number of folds must be at least 2!");
		}
		if (numFolds > numInstances()) {
			throw new IllegalArgumentException("Can't have more folds than instances!");
		}
		numInstForFold = numInstances() / numFolds;
		if (numFold < numInstances() % numFolds) {
			numInstForFold++;
			offset = numFold;
		}else
			offset = numInstances() % numFolds;
		train = new Instances(this, numInstances() - numInstForFold);
		first = numFold * (numInstances() / numFolds) + offset;
		copyInstances(0, train, first);
		copyInstances(first + numInstForFold, train,
				numInstances() - first - numInstForFold);

		return train;
	}

	/**
	 * Creates the training set for one fold of a cross-validation 
	 * on the dataset. The data is subsequently randomized based
	 * on the given random number generator.
	 *
	 * @param numFolds the number of folds in the cross-validation. Must
	 * be greater than 1.
	 * @param numFold 0 for the first fold, 1 for the second, ...
	 * @param random the random number generator
	 * @return the training set 
	 * @throws IllegalArgumentException if the number of folds is less than 2
	 * or greater than the number of instances.
	 */
	//@ requires 2 <= numFolds && numFolds < numInstances();
	//@ requires 0 <= numFold && numFold < numFolds;
	public Instances trainCV(int numFolds, int numFold, Random random) {

		Instances train = trainCV(numFolds, numFold);
		train.randomize(random);
		return train;
	}

	/**
	 * Computes the variance for a numeric attribute.
	 *
	 * @param attIndex the numeric attribute (index starts with 0)
	 * @return the variance if the attribute is numeric
	 * @throws IllegalArgumentException if the attribute is not numeric
	 */
	public /*@pure@*/ double variance(int attIndex) {

		double sum = 0, sumSquared = 0, sumOfWeights = 0;

		if (attribute(attIndex).getType()==Attribute.NOMINAL) {
			throw new IllegalArgumentException("Can't compute variance because attribute is " +
			"not numeric!");
		}
		for (int i = 0; i < numInstances(); i++) {
			if (!instance(i).isMissing(attIndex)) {
				sum += instance(i).weight() * 
				instance(i).value(attIndex);
				sumSquared += instance(i).weight() * 
				instance(i).value(attIndex) *
				instance(i).value(attIndex);
				sumOfWeights += instance(i).weight();
			}
		}
		if (sumOfWeights <= 1) {
			return 0;
		}
		double result = (sumSquared - (sum * sum / sumOfWeights)) / 
		(sumOfWeights - 1);

		// We don't like negative variance
		if (result < 0) {
			return 0;
		} else {
			return result;
		}
	}

	/**
	 * Computes the variance for a numeric attribute.
	 *
	 * @param att the numeric attribute
	 * @return the variance if the attribute is numeric
	 * @throws IllegalArgumentException if the attribute is not numeric
	 */
	public /*@pure@*/ double variance(Attribute att) {

		return variance(indexOf(att));
	}

	/**
	 * Calculates summary statistics on the values that appear in this
	 * set of instances for a specified attribute.
	 *
	 * @param index the index of the attribute to summarize (index starts with 0)
	 * @return an AttributeStats object with it's fields calculated.
	 */
	//@ requires 0 <= index && index < numAttributes();
//	public AttributeStats attributeStats(int index) {
//
//		AttributeStats result = new AttributeStats();
//		if (attribute(index).isNominal()) {
//			result.nominalCounts = new int [attribute(index).numValues()];
//		}
//		if (attribute(index).isNumeric()) {
//			result.numericStats = new weka.experiment.Stats();
//		}
//		result.totalCount = numInstances();
//
//		double [] attVals = attributeToDoubleArray(index);
//		int [] sorted = Utils.sort(attVals);
//		int currentCount = 0;
//		double prev = Instance.missingValue();
//		for (int j = 0; j < numInstances(); j++) {
//			Instance current = instance(sorted[j]);
//			if (current.isMissing(index)) {
//				result.missingCount = numInstances() - j;
//				break;
//			}
//			if (current.value(index) == prev) {
//				currentCount++;
//			} else {
//				result.addDistinct(prev, currentCount);
//				currentCount = 1;
//				prev = current.value(index);
//			}
//		}
//		result.addDistinct(prev, currentCount);
//		result.distinctCount--; // So we don't count "missing" as a value 
//		return result;
//	}

	/**
	 * Gets the value of all instances in this dataset for a particular
	 * attribute. Useful in conjunction with Utils.sort to allow iterating
	 * through the dataset in sorted order for some attribute.
	 *
	 * @param index the index of the attribute.
	 * @return an array containing the value of the desired attribute for
	 * each instance in the dataset. 
	 */
	//@ requires 0 <= index && index < numAttributes();
	public /*@pure@*/ double [] attributeToDoubleArray(int index) {

		double [] result = new double[numInstances()];
		for (int i = 0; i < result.length; i++) {
			result[i] = instance(i).value(index);
		}
		return result;
	}

	/**
	 * Generates a string summarizing the set of instances. Gives a breakdown
	 * for each attribute indicating the number of missing/discrete/unique
	 * values and other information.
	 *
	 * @return a string summarizing the dataset
	 */
//	public String toSummaryString() {
//
//		StringBuffer result = new StringBuffer();
//		result.append("Relation Name:  ").append(relationName()).append('\n');
//		result.append("Num Instances:  ").append(numInstances()).append('\n');
//		result.append("Num Attributes: ").append(numAttributes()).append('\n');
//		result.append('\n');
//
//		result.append(Utils.padLeft("", 5)).append(Utils.padRight("Name", 25));
//		result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5));
//		result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
//		result.append(Utils.padLeft("Missing", 12));
//		result.append(Utils.padLeft("Unique", 12));
//		result.append(Utils.padLeft("Dist", 6)).append('\n');
//		for (int i = 0; i < numAttributes(); i++) {
//			Attribute a = attribute(i);
//			AttributeStats as = attributeStats(i);
//			result.append(Utils.padLeft("" + (i + 1), 4)).append(' ');
//			result.append(Utils.padRight(a.name(), 25)).append(' ');
//			long percent;
//			switch (a.type()) {
//			case Attribute.NOMINAL:
//				result.append(Utils.padLeft("Nom", 4)).append(' ');
//				percent = Math.round(100.0 * as.intCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				result.append(Utils.padLeft("" + 0, 3)).append("% ");
//				percent = Math.round(100.0 * as.realCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				break;
//			case Attribute.NUMERIC:
//				result.append(Utils.padLeft("Num", 4)).append(' ');
//				result.append(Utils.padLeft("" + 0, 3)).append("% ");
//				percent = Math.round(100.0 * as.intCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				percent = Math.round(100.0 * as.realCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				break;
//			case Attribute.DATE:
//				result.append(Utils.padLeft("Dat", 4)).append(' ');
//				result.append(Utils.padLeft("" + 0, 3)).append("% ");
//				percent = Math.round(100.0 * as.intCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				percent = Math.round(100.0 * as.realCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				break;
//			case Attribute.STRING:
//				result.append(Utils.padLeft("Str", 4)).append(' ');
//				percent = Math.round(100.0 * as.intCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				result.append(Utils.padLeft("" + 0, 3)).append("% ");
//				percent = Math.round(100.0 * as.realCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				break;
//			case Attribute.RELATIONAL:
//				result.append(Utils.padLeft("Rel", 4)).append(' ');
//				percent = Math.round(100.0 * as.intCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				result.append(Utils.padLeft("" + 0, 3)).append("% ");
//				percent = Math.round(100.0 * as.realCount / as.totalCount);
//				result.append(Utils.padLeft("" + percent, 3)).append("% ");
//				break;
//			default:
//				result.append(Utils.padLeft("???", 4)).append(' ');
//			result.append(Utils.padLeft("" + 0, 3)).append("% ");
//			percent = Math.round(100.0 * as.intCount / as.totalCount);
//			result.append(Utils.padLeft("" + percent, 3)).append("% ");
//			percent = Math.round(100.0 * as.realCount / as.totalCount);
//			result.append(Utils.padLeft("" + percent, 3)).append("% ");
//			break;
//			}
//			result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");
//			percent = Math.round(100.0 * as.missingCount / as.totalCount);
//			result.append(Utils.padLeft("" + percent, 3)).append("% ");
//			result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");
//			percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
//			result.append(Utils.padLeft("" + percent, 3)).append("% ");
//			result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');
//			result.append('\n');
//		}
//		return result.toString();
//	}

	/**
	 * Copies instances from one set to the end of another 
	 * one.
	 *
	 * @param from the position of the first instance to be copied
	 * @param dest the destination for the instances
	 * @param num the number of instances to be copied
	 */
	//@ requires 0 <= from && from <= numInstances() - num;
	//@ requires 0 <= num;
	protected void copyInstances(int from, /*@non_null@*/ Instances dest, int num) {

		for (int i = 0; i < num; i++) {
			dest.add(instance(from + i));
		}
	}

	/**
	 * Replaces the attribute information by a clone of
	 * itself.
	 */
	protected void freshAttributeInfo() {

		m_Attributes = (FastVector) m_Attributes.copyElements();
	}

	/**
	 * Returns string including all instances, their weights and
	 * their indices in the original dataset.
	 *
	 * @return description of instance and its weight as a string
	 */
	protected /*@pure@*/ String instancesAndWeights(){

		StringBuffer text = new StringBuffer();

		for (int i = 0; i < numInstances(); i++) {
			text.append(instance(i) + " " + instance(i).weight());
			if (i < numInstances() - 1) {
				text.append("\n");
			}
		}
		return text.toString();
	}

	/**
	 * Partitions the instances around a pivot. Used by quicksort and
	 * kthSmallestValue.
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 * @param l the first index of the subset (index starts with 0)
	 * @param r the last index of the subset (index starts with 0)
	 *
	 * @return the index of the middle element
	 */
	//@ requires 0 <= attIndex && attIndex < numAttributes();
	//@ requires 0 <= left && left <= right && right < numInstances();
	protected int partition(int attIndex, int l, int r) {

		double pivot = instance((l + r) / 2).value(attIndex);

		while (l < r) {
			while ((instance(l).value(attIndex) < pivot) && (l < r)) {
				l++;
			}
			while ((instance(r).value(attIndex) > pivot) && (l < r)) {
				r--;
			}
			if (l < r) {
				swap(l, r);
				l++;
				r--;
			}
		}
		if ((l == r) && (instance(r).value(attIndex) > pivot)) {
			r--;
		} 

		return r;
	}

	/**
	 * Implements quicksort according to Manber's "Introduction to
	 * Algorithms".
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 * @param left the first index of the subset to be sorted (index starts with 0)
	 * @param right the last index of the subset to be sorted (index starts with 0)
	 */
	//@ requires 0 <= attIndex && attIndex < numAttributes();
	//@ requires 0 <= first && first <= right && right < numInstances();
	protected void quickSort(int attIndex, int left, int right) {

		if (left < right) {
			int middle = partition(attIndex, left, right);
			quickSort(attIndex, left, middle);
			quickSort(attIndex, middle + 1, right);
		}
	}

	/**
	 * Implements computation of the kth-smallest element according
	 * to Manber's "Introduction to Algorithms".
	 *
	 * @param attIndex the attribute's index (index starts with 0)
	 * @param left the first index of the subset (index starts with 0)
	 * @param right the last index of the subset (index starts with 0)
	 * @param k the value of k
	 *
	 * @return the index of the kth-smallest element
	 */
	//@ requires 0 <= attIndex && attIndex < numAttributes();
	//@ requires 0 <= first && first <= right && right < numInstances();
	protected int select(int attIndex, int left, int right, int k) {

		if (left == right) {
			return left;
		} else {
			int middle = partition(attIndex, left, right);
			if ((middle - left + 1) >= k) {
				return select(attIndex, left, middle, k);
			} else {
				return select(attIndex, middle + 1, right, k - (middle - left + 1));
			}
		}
	}

	/**
	 * Help function needed for stratification of set.
	 *
	 * @param numFolds the number of folds for the stratification
	 */
	protected void stratStep (int numFolds){

		FastVector newVec = new FastVector(m_Instances.capacity());
		int start = 0, j;

		// create stratified batch
		while (newVec.size() < numInstances()) {
			j = start;
			while (j < numInstances()) {
				newVec.addElement(instance(j));
				j = j + numFolds;
			}
			start++;
		}
		m_Instances = newVec;
	}

	/**
	 * Swaps two instances in the set.
	 *
	 * @param i the first instance's index (index starts with 0)
	 * @param j the second instance's index (index starts with 0)
	 */
	//@ requires 0 <= i && i < numInstances();
	//@ requires 0 <= j && j < numInstances();
	public void swap(int i, int j){

		m_Instances.swap(i, j);
	}

	/**
	 * Merges two sets of Instances together. The resulting set will have
	 * all the attributes of the first set plus all the attributes of the 
	 * second set. The number of instances in both sets must be the same.
	 *
	 * @param first the first set of Instances
	 * @param second the second set of Instances
	 * @return the merged set of Instances
	 * @throws IllegalArgumentException if the datasets are not the same size
	 */
	public static Instances mergeInstances(Instances first, Instances second) {

		if (first.numInstances() != second.numInstances()) {
			throw new IllegalArgumentException("Instance sets must be of the same size");
		}

		// Create the vector of merged attributes
		FastVector newAttributes = new FastVector();
		for (int i = 0; i < first.numAttributes(); i++) {
			newAttributes.addElement(first.attribute(i));
		}
		for (int i = 0; i < second.numAttributes(); i++) {
			newAttributes.addElement(second.attribute(i));
		}

		// Create the set of Instances
		Instances merged = new Instances(first.relationName() + '_'
				+ second.relationName(), 
				newAttributes, 
				first.numInstances());
		// Merge each instance
		for (int i = 0; i < first.numInstances(); i++) {
			merged.add(first.instance(i).mergeInstance(second.instance(i)));
		}
		return merged;
	}

	/**
	 * Method for testing this class.
	 *
	 * @param argv should contain one element: the name of an ARFF file
	 */
	//@ requires argv != null;
	//@ requires argv.length == 1;
	//@ requires argv[0] != null;
//	public static void test(String [] argv) {
//
//		Instances instances, secondInstances, train, test, empty;
//		Random random = new Random(2);
//		Reader reader;
//		int start, num;
//		FastVector testAtts, testVals;
//		int i,j;
//
//		try{
//			if (argv.length > 1) {
//				throw (new Exception("Usage: Instances [<filename>]"));
//			}
//
//			// Creating set of instances from scratch
//			testVals = new FastVector(2);
//			testVals.addElement("first_value");
//			testVals.addElement("second_value");
//			testAtts = new FastVector(2);
//			testAtts.addElement(new Attribute("nominal_attribute", testVals));
//			testAtts.addElement(new Attribute("numeric_attribute"));
//			instances = new Instances("test_set", testAtts, 10);
//			instances.add(new Instance(instances.numAttributes()));
//			instances.add(new Instance(instances.numAttributes()));
//			instances.add(new Instance(instances.numAttributes()));
//			instances.setClassIndex(0);
//			System.out.println("\nSet of instances created from scratch:\n");
//			System.out.println(instances);
//
//			if (argv.length == 1) {
//				String filename = argv[0];
//				reader = new FileReader(filename);
//
//				// Read first five instances and print them
//				System.out.println("\nFirst five instances from file:\n");
//				instances = new Instances(reader, 1);
//				instances.setClassIndex(instances.numAttributes() - 1);
//				i = 0;
//				while ((i < 5) && (instances.readInstance(reader))) {
//					i++;
//				}
//				System.out.println(instances);
//
//				// Read all the instances in the file
//				reader = new FileReader(filename);
//				instances = new Instances(reader);
//
//				// Make the last attribute be the class 
//				instances.setClassIndex(instances.numAttributes() - 1);
//
//				// Print header and instances.
//				System.out.println("\nDataset:\n");
//				System.out.println(instances);
//				System.out.println("\nClass index: "+instances.classIndex());
//			}
//
//			// Test basic methods based on class index.
//			System.out.println("\nClass name: "+instances.classAttribute().name());
//			System.out.println("\nClass index: "+instances.classIndex());
//			System.out.println("\nClass is nominal: " +
//					instances.classAttribute().isNominal());
//			System.out.println("\nClass is numeric: " +
//					instances.classAttribute().isNumeric());
//			System.out.println("\nClasses:\n");
//			for (i = 0; i < instances.numClasses(); i++) {
//				System.out.println(instances.classAttribute().value(i));
//			}
//			System.out.println("\nClass values and labels of instances:\n");
//			for (i = 0; i < instances.numInstances(); i++) {
//				Instance inst = instances.instance(i);
//				System.out.print(inst.classValue() + "\t");
//				System.out.print(inst.toString(inst.classIndex()));
//				if (instances.instance(i).classIsMissing()) {
//					System.out.println("\tis missing");
//				} else {
//					System.out.println();
//				}
//			}
//
//			// Create random weights.
//			System.out.println("\nCreating random weights for instances.");
//			for (i = 0; i < instances.numInstances(); i++) {
//				instances.instance(i).setWeight(random.nextDouble()); 
//			}
//
//			// Print all instances and their weights (and the sum of weights).
//			System.out.println("\nInstances and their weights:\n");
//			System.out.println(instances.instancesAndWeights());
//			System.out.print("\nSum of weights: ");
//			System.out.println(instances.sumOfWeights());
//
//			// Insert an attribute
//			secondInstances = new Instances(instances);
//			Attribute testAtt = new Attribute("Inserted");
//			secondInstances.insertAttributeAt(testAtt, 0);
//			System.out.println("\nSet with inserted attribute:\n");
//			System.out.println(secondInstances);
//			System.out.println("\nClass name: "
//					+ secondInstances.classAttribute().name());
//
//			// Delete the attribute
//			secondInstances.deleteAttributeAt(0);
//			System.out.println("\nSet with attribute deleted:\n");
//			System.out.println(secondInstances);
//			System.out.println("\nClass name: "
//					+ secondInstances.classAttribute().name());
//
//			// Test if headers are equal
//			System.out.println("\nHeaders equal: "+
//					instances.equalHeaders(secondInstances) + "\n");
//
//			// Print data in internal format.
//			System.out.println("\nData (internal values):\n");
//			for (i = 0; i < instances.numInstances(); i++) {
//				for (j = 0; j < instances.numAttributes(); j++) {
//					if (instances.instance(i).isMissing(j)) {
//						System.out.print("? ");
//					} else {
//						System.out.print(instances.instance(i).value(j) + " ");
//					}
//				}
//				System.out.println();
//			}
//
//			// Just print header
//			System.out.println("\nEmpty dataset:\n");
//			empty = new Instances(instances, 0);
//			System.out.println(empty);
//			System.out.println("\nClass name: "+empty.classAttribute().name());
//
//			// Create copy and rename an attribute and a value (if possible)
//			if (empty.classAttribute().isNominal()) {
//				Instances copy = new Instances(empty, 0);
//				copy.renameAttribute(copy.classAttribute(), "new_name");
//				copy.renameAttributeValue(copy.classAttribute(), 
//						copy.classAttribute().value(0), 
//				"new_val_name");
//				System.out.println("\nDataset with names changed:\n" + copy);
//				System.out.println("\nOriginal dataset:\n" + empty);
//			}
//
//			// Create and prints subset of instances.
//			start = instances.numInstances() / 4;
//			num = instances.numInstances() / 2;
//			System.out.print("\nSubset of dataset: ");
//			System.out.println(num + " instances from " + (start + 1) 
//					+ ". instance");
//			secondInstances = new Instances(instances, start, num);
//			System.out.println("\nClass name: "
//					+ secondInstances.classAttribute().name());
//
//			// Print all instances and their weights (and the sum of weights).
//			System.out.println("\nInstances and their weights:\n");
//			System.out.println(secondInstances.instancesAndWeights());
//			System.out.print("\nSum of weights: ");
//			System.out.println(secondInstances.sumOfWeights());
//
//			// Create and print training and test sets for 3-fold
//			// cross-validation.
//			System.out.println("\nTrain and test folds for 3-fold CV:");
//			if (instances.classAttribute().isNominal()) {
//				instances.stratify(3);
//			}
//			for (j = 0; j < 3; j++) {
//				train = instances.trainCV(3,j, new Random(1));
//				test = instances.testCV(3,j);
//
//				// Print all instances and their weights (and the sum of weights).
//				System.out.println("\nTrain: ");
//				System.out.println("\nInstances and their weights:\n");
//				System.out.println(train.instancesAndWeights());
//				System.out.print("\nSum of weights: ");
//				System.out.println(train.sumOfWeights());
//				System.out.println("\nClass name: "+train.classAttribute().name());
//				System.out.println("\nTest: ");
//				System.out.println("\nInstances and their weights:\n");
//				System.out.println(test.instancesAndWeights());
//				System.out.print("\nSum of weights: ");
//				System.out.println(test.sumOfWeights());
//				System.out.println("\nClass name: "+test.classAttribute().name());
//			}
//
//			// Randomize instances and print them.
//			System.out.println("\nRandomized dataset:");
//			instances.randomize(random);
//
//			// Print all instances and their weights (and the sum of weights).
//			System.out.println("\nInstances and their weights:\n");
//			System.out.println(instances.instancesAndWeights());
//			System.out.print("\nSum of weights: ");
//			System.out.println(instances.sumOfWeights());
//
//			// Sort instances according to first attribute and
//			// print them.
//			System.out.print("\nInstances sorted according to first attribute:\n ");
//			instances.sort(0);
//
//			// Print all instances and their weights (and the sum of weights).
//			System.out.println("\nInstances and their weights:\n");
//			System.out.println(instances.instancesAndWeights());
//			System.out.print("\nSum of weights: ");
//			System.out.println(instances.sumOfWeights());
//		} catch (Exception e) {
//			e.printStackTrace(); 
//		}
//	}

	/**
	 * Main method for this class. The following calls are possible:
	 * <ul>
	 *   <li>
	 *     <code>weka.core.Instances</code> help<br/>
	 *     prints a short list of possible commands.
	 *   </li>
	 *   <li>
	 *     <code>weka.core.Instances</code> &lt;filename&gt;<br/>
	 *     prints a summary of a set of instances.
	 *   </li>
	 *   <li>
	 *     <code>weka.core.Instances</code> merge &lt;filename1&gt; &lt;filename2&gt;<br/>
	 *     merges the two datasets (must have same number of instances) and
	 *     outputs the results on stdout.
	 *   </li>
	 *   <li>
	 *     <code>weka.core.Instances</code> append &lt;filename1&gt; &lt;filename2&gt;<br/>
	 *     appends the second dataset to the first one (must have same headers) and
	 *     outputs the results on stdout.
	 *   </li>
	 *   <li>
	 *     <code>weka.core.Instances</code> randomize &lt;seed&gt; &lt;filename&gt;<br/>
	 *     randomizes the dataset with the given seed and outputs the result on stdout.
	 *   </li>
	 * </ul>
	 *
	 * @param args 	the commandline parameters
	 */
//	public static void main(String[] args) {
//
//		try {
//			Instances i;
//			// read from stdin and print statistics
//			if (args.length == 0) {
//				DataSource source = new DataSource(System.in);
//				i = source.getDataSet();
//				System.out.println(i.toSummaryString());
//			}
//			// read file and print statistics
//			else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) {
//				DataSource source = new DataSource(args[0]);
//				i = source.getDataSet();
//				System.out.println(i.toSummaryString());
//			}
//			// read two files, merge them and print result to stdout
//			else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) {
//				DataSource source1 = new DataSource(args[1]);
//				DataSource source2 = new DataSource(args[2]);
//				i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet());
//				System.out.println(i);
//			}
//			// read two files, append them and print result to stdout
//			else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) {
//				DataSource source1 = new DataSource(args[1]);
//				DataSource source2 = new DataSource(args[2]);
//				if (!source1.getStructure().equalHeaders(source2.getStructure()))
//					throw new Exception("The two datasets have different headers!");
//				Instances structure = source1.getStructure();
//				System.out.println(source1.getStructure());
//				while (source1.hasMoreElements(structure))
//					System.out.println(source1.nextElement(structure));
//				structure = source2.getStructure();
//				while (source2.hasMoreElements(structure))
//					System.out.println(source2.nextElement(structure));
//			}
//			// read file and seed value, randomize data and print result to stdout
//			else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) {
//				DataSource source = new DataSource(args[2]);
//				i = source.getDataSet();
//				i.randomize(new Random(Integer.parseInt(args[1])));
//				System.out.println(i);
//			}
//			// wrong parameters
//			else {
//				System.err.println(
//						"\nUsage:\n"
//						+ "\tweka.core.Instances help\n"
//						+ "\tweka.core.Instances <filename>\n"
//						+ "\tweka.core.Instances merge <filename1> <filename2>\n"
//						+ "\tweka.core.Instances append <filename1> <filename2>\n"
//						+ "\tweka.core.Instances randomize <seed> <filename>\n"
//				);
//				System.exit(1);
//			}
//		}
//		catch (Exception ex) {
//			ex.printStackTrace();
//			System.err.println(ex.getMessage());
//		}
//	}
	
            /**
     * Returns the index of the attribute given.
     * @param att attribute given.
     * @return the index of the attribute given.
     */
	protected int indexOf(Attribute att){
		int index = -1;
		for(int i=0;i<Attributes.getNumAttributes() && index==-1;i++){
			if(Attributes.getAttribute(i)==att)
				index = i;
		}
		return index;
	}
	
    /**
     * Returns the index of the value given of the attribute given.
     * @param att attribute given.
     * @param value value given.
     * @return the index of the value given of the attribute given.
     */
    protected int indexOfValue(Attribute att,String value){
		Vector vals = att.getNominalValuesList();
		String s;
		int index = -1;
		for(int i=0;i<vals.size() && index == -1;i++){
			s = (String)vals.get(i);
			if(value.compareTo(s)==0)
				index = i;
		}
		
		return index;
	}
}

